Chapter 10 Gut microbiota: functional analysis

load("data/gut/data.Rdata")

sample_metadata <- sample_metadata %>% 
  filter(sample!="EHI02625")
sample_metadata$environment <- factor(sample_metadata$environment, levels=c("low", "high"))
treatment_colors <- c("#f56042","#429ef5")
genome_counts_filt <- genome_counts_filt %>%
  select(-EHI02625)
genome_counts_filt <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts),] 
rownames(genome_counts_filt) <- NULL
#Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts[, !grepl("^S", colnames(genome_gifts))],GIFT_db) # Remove structure traits
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>% 
  select_if(~ !is.numeric(.) || sum(.) != 0)

#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered,GIFT_db)

#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)

#Get community-weighed average GIFTs per sample
genome_counts_row <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
  column_to_rownames(., "genome")

#genome_counts_row <- rownames_to_column(genome_counts_row, "genome")
GIFTs_elements_community <- to.community(GIFTs_elements_filtered,genome_counts_row,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts_row,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts_row,GIFT_db)

10.1 Metabolic capacity index (MCI)

GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = "sample") %>%
  group_by(environment) %>%
  summarise(MCI = mean(value), sd = sd(value)) %>%
  tt()
environment MCI sd
low 0.2687300 0.01151867
high 0.2657253 0.01442070
MCI <- GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = "sample")

shapiro.test(MCI$value) %>% 
  tidy()
# A tibble: 1 × 3
  statistic p.value method                     
      <dbl>   <dbl> <chr>                      
1     0.977   0.741 Shapiro-Wilk normality test
wilcox.test(value ~ environment, data=MCI) %>% 
  tidy()
# A tibble: 1 × 4
  statistic p.value method                       alternative
      <dbl>   <dbl> <chr>                        <chr>      
1       136   0.345 Wilcoxon rank sum exact test two.sided  
MCI %>% 
  ggplot(aes(x=environment, y=value, color = environment, fill=environment)) +
    geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE) +
    geom_jitter(width = 0.1, show.legend = TRUE) +
    scale_color_manual(values=treatment_colors)+
    scale_fill_manual(values=treatment_colors)+
  theme_minimal() +
  theme(
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank())

10.2 Wilcoxon

10.2.1 Community elements differences:

element_gift <- GIFTs_elements_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  left_join(sample_metadata %>% select(sample,environment,river), by=join_by("sample"=="sample"))
uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

significant_elements <- element_gift %>%
    pivot_longer(-c(sample,environment,river), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ environment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)%>%
  rownames_to_column(., "Elements")  %>%
  left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(trait == Code_element))

element_gift_t <- element_gift  %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "trait")

element_gift_filt <- subset(element_gift_t, trait %in% significant_elements$trait) %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))

element_gift_filt %>%
  select(-c(sample,river))%>%
  group_by(environment) %>%
  summarise(across(everything(), mean))%>%
   t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "Elements")  %>%
  left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(Elements == Code_element))
   Elements          low         high                                      Function
1     B0210 0.5209436000 0.4509265000               Amino acid biosynthesis_Leucine
2     B0215 0.4987544000 0.5656788000             Amino acid biosynthesis_Histidine
3     B0218 0.3182126000 0.2983654000              Amino acid biosynthesis_Tyrosine
4     B0219 0.0231044000 0.0186570700                  Amino acid biosynthesis_GABA
5     B0220 0.0655121800 0.0480075800          Amino acid biosynthesis_Beta-alanine
6     B0307 0.3575178000 0.3030128000 Amino acid derivative biosynthesis_Spermidine
7     B0706 0.4328692000 0.5003124000              Vitamin biosynthesis_Biotin (B7)
8     B0710 0.0526786800 0.0786377400       Vitamin biosynthesis_Phylloquinone (K1)
9     B0711 0.2579474000 0.3005016000         Vitamin biosynthesis_Menaquinone (K2)
10    B1012 0.0149240150 0.0065831860            Antibiotic biosynthesis_Fosfomycin
11    D0101 0.0928441900 0.0506893400                Lipid degradation_Triglyceride
12    D0104 0.1351958300 0.0936400900          Lipid degradation_Dicarboxylic acids
13    D0504 0.0211782000 0.0540603300             Amino acid degradation_Methionine
14    D0516 0.0780710300 0.0469275900           Amino acid degradation_Beta-alanine
15    D0606 0.0426329300 0.0253378300       Nitrogen compound degradation_Allantoin
16    D0610 0.0046945340 0.0091389410     Nitrogen compound degradation_Methylamine
17    D0704 0.3817523000 0.3135096000                  Alcohol degradation_Glycerol
18    D0709 0.0007728956 0.0000000000         Alcohol degradation_Polyvinyl alcohol
19    D0805 0.0029745460 0.0052878350               Xenobiotic degradation_Benzoate
20    D0812 0.0003603398 0.0007750193            Xenobiotic degradation_Naphthalene
21    D0816 0.1123401900 0.0953450600          Xenobiotic degradation_Phenylacetate
22    D0907 0.2604483000 0.2005187000           Antibiotic degradation_Tetracycline
23    D0910 0.3969376000 0.3309116000        Antibiotic degradation_Chloramphenicol
difference_table <- element_gift_filt %>%
  select(-sample) %>%
  group_by(environment) %>%
  summarise(across(everything(), mean)) %>%
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric) %>%
  rownames_to_column(., "Elements") %>%
  left_join(.,uniqueGIFT_db[c(1,3,4)],by = join_by(Elements == Code_element)) %>% 
  arrange(Function) %>% 
  mutate(Difference=high-low)%>% 
  mutate(group_color = ifelse(Difference <0, "Low","High")) 
means_gift <- element_gift_filt %>% 
  select(-c(environment,river)) %>% 
  pivot_longer(!sample, names_to = "elements", values_to = "abundance") %>% 
  left_join(sample_metadata, by=join_by(sample==sample)) %>% 
  group_by(environment, elements) %>%
  summarise(mean=mean(abundance))

log_fold <- means_gift %>%
  group_by(elements) %>%
  summarise(
    logfc_high_low = log2(mean[environment == "high"] / mean[environment == "low"])
    )
element_gift_names <- element_gift_filt %>%
  select(-c(environment,river)) %>%
   t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "Elements")  %>%
  left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(Elements == Code_element))%>%
  select(-Elements)%>%
  select(Function, everything())%>%
   t()%>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))


colNames <- names(element_gift_names %>% select(-c(sample,environment,river)))
for(i in colNames){
  plt <- ggplot(element_gift_names, aes(x=environment, y=.data[[i]], color = environment, fill=environment)) +
    geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE) +
    geom_jitter(width = 0.1, show.legend = TRUE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
  theme_minimal() +
  theme(
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank())
print(plt)
}

difference_table %>%
  ggplot(aes(x=forcats::fct_reorder(Function,Difference), y=Difference, fill=group_color)) + 
  geom_col() +
#  geom_point(size=4) + 
  scale_fill_manual(values=treatment_colors) +
  geom_hline(yintercept=0) + 
  coord_flip()+
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        legend.position = "right", 
        panel.background = element_blank(),
          panel.grid.major = element_line(size = 0.15, linetype = 'solid',
                                colour = "grey"))+
  xlab("Microbial Functional Capacity") + 
  ylab("Mean difference")+
  labs(fill="Elevation")
uniqueGIFT <- unique(GIFT_db[c(2,3,4,5,6)])

code_function <- difference_table %>%
  left_join(uniqueGIFT[c(1:3)], by=join_by(Elements==Code_element))

unique_codes<-unique(code_function$Code_function)

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  filter(Code_function %in% unique_codes)%>% 
  mutate(legend=str_c(Code_function," - ",Function))

code_function %>%
#  mutate(Difference_abs = abs(Difference)) %>% 
  left_join(significant_elements, by=join_by(Elements==trait)) %>%
  left_join(log_fold, by=join_by(Elements==elements)) %>% 
  left_join(gift_colors, by=join_by(Code_function==Code_function)) %>% 
  ggplot(., aes(x = logfc_high_low, y = -log(p_adjust), color=legend, size=abs(Difference))) +
  geom_jitter(width = 0.2, height = 0.2)+
  geom_vline(xintercept=0) +
  scale_color_manual(values = gift_colors$Color)+
  #xlim(c(-10,4)) +
  theme_classic()+
  labs(size="Mean difference (abs)", color="Functional trait")+
  labs(x = "Log-fold change", y="-Log adjusted p-value") +
  geom_text_repel(aes(label = Element), min.segment.length = 0.4, size=2.5, max.overlaps = Inf)

10.2.2 Community functions differences

function_gift <- GIFTs_functions_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  merge(., sample_metadata %>% select(sample,environment,river), by="sample")
unique_funct_db<- GIFT_db[c(3,4,5)] %>% 
  distinct(Code_function, .keep_all = TRUE)

significant_functional <- function_gift %>%
    pivot_longer(-c(sample,environment,river), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ environment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)%>%
  left_join(.,unique_funct_db[c(1,3)],by = join_by(trait == Code_function))
function_gift_t <- function_gift  %>% 
  select(-environment)  %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "trait")

function_gift_filt <- subset(function_gift_t, trait %in% significant_functional$trait) %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(., sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))


function_gift_filt %>%
  select(-c(sample,river)) %>%
  group_by(environment)  %>%
  summarise(across(everything(), mean))%>%
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "Code_function")  %>%
  left_join(unique_funct_db[c(1,3)],by = join_by(Code_function == Code_function))
  Code_function        low       high            Function
1           D01 0.09715327 0.06631964   Lipid degradation
2           D07 0.23552990 0.19997740 Alcohol degradation
function_gift_names <- function_gift_filt%>%
  select(-c(environment,river)) %>%
   t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "Code_function")  %>%
  left_join(.,unique_funct_db[c(1,3)],by = join_by(Code_function == Code_function))%>%
  select(-Code_function)%>%
  select(Function, everything())%>%
   t()%>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))


colNames <- names(function_gift_names)[2]
for(i in colNames){
  plt <- ggplot(function_gift_names, aes(x=environment, y=.data[[i]], color = environment, fill=environment)) +
    geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE) +
  geom_jitter(width = 0.1, show.legend = TRUE) +
    scale_color_manual(values=treatment_colors)+
    scale_fill_manual(values=treatment_colors)+
  theme_minimal() +
  theme(
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank())
print(plt)
}

10.2.3 Community domains differences

domain_gift <- GIFTs_domains_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  merge(sample_metadata %>% select(sample,environment,river), by="sample")
unique_domain_db<- GIFT_db[c(4)] %>% 
  distinct(Domain, .keep_all = TRUE)

significant_domain <- domain_gift %>%
    pivot_longer(-c(sample,environment,river), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ environment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)
domain_gift_t <- domain_gift  %>% 
  select(-environment)  %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "trait")

domain_gift_filt <- subset(domain_gift_t, trait %in% significant_domain$trait) %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample") %>% 
  left_join(sample_metadata %>% select(sample,environment,river), by = "sample")


domain_gift_filt %>%
  select(-c(sample,river)) %>%
  group_by(environment)  %>%
  summarise(across(everything(), mean))%>%
   t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "Code_domain")
  Code_domain       low      high
1 Degradation 0.1839132 0.1748753
domain_gift_names <- domain_gift_filt%>%
  select(-environment)%>%
   t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "Code_domain")  %>%
  # select(-Code_domain)%>%
  # select(domain, everything())%>%
   t()%>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))


colNames <- names(domain_gift_names)[2:3]
for(i in colNames){
  plt <- ggplot(domain_gift_names, aes(x=environment, y=.data[[i]], color = environment, fill=environment)) +
    geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE) +
  geom_jitter(width = 0.1, show.legend = TRUE) +
    scale_color_manual(values=treatment_colors)+
    scale_fill_manual(values=treatment_colors)+
  theme_minimal() +
  theme(
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank())
print(plt)
}